/*** tabdata1.do

This do-file merges city proper level tabular data to create one data set.
Included is data on aggregates of urban districts only.  Data on county cities
is incorporated into the us123.dta data set.

***/

clear
set more off
capture log close
log using tabdata1.log, replace text

************************ 1. Prepare various tabular sources for merging *********************************

use ..\..\data\tabular_data_BJ\source\ind_yb_rz.dta
***Fix these unit_status which are wrong
replace unit_status=5 if unit_code_08==130200 & year==1995
replace unit_status=5 if unit_code_08==130200 & year==2000
replace unit_status=5 if unit_code_08==130200 & year==2005
replace unit_status=4 if unit_code_08==130201 & year==1995
replace unit_status=4 if unit_code_08==130201 & year==2000
replace unit_status=4 if unit_code_08==130201 & year==2005
replace unit_status=5 if unit_code_08==130400 & year==1995
replace unit_status=4 if unit_code_08==130401 & year==1995
replace unit_status=5 if unit_code_08==510300 & year==2000
replace unit_status=4 if unit_code_08==510301 & year==2000
keep if unit_status==4
*** These are empty, probably because they are cities that had not yet been promoted
drop if unit_code_08==222401
drop if unit_code_08==341601
drop if unit_code_08==422801
drop if unit_code_08==433101
drop if unit_code_08==441901
drop if unit_code_08==442001
gen city_code = unit_code_08
replace city_code = unit_code_08-100 if city_code-10*int(city_code/10)==0
replace city_code = unit_code_08-1 if city_code-10*int(city_code/10)==1
sort city_code year
save temp_indyb.dta, replace

use ..\..\data\tabular_data_BJ\source\PY_ADCP_final_rz.dta
sort unit_code year
save ..\..\data\tabular_data_BJ\source\PY_ADCP_final_rz.dta, replace

use ..\..\data\tabular_data_BJ\source\PY_2YCP_final_rz.dta
sort unit_code year 
merge unit_code year using ..\..\data\tabular_data_BJ\source\PY_ADCP_final_rz, update
tab _merge
drop _merge
keep if year==1990|year==1995
gen city_code = unit_code
replace city_code = unit_code-100 if city_code-10*int(city_code/10)==0
replace city_code = unit_code-1 if city_code-10*int(city_code/10)==1
rename gdp gdp_py
rename givo givo_py
rename fdi fdi_py
sort city_code year
save temp_2ycp.dta, replace

use ..\..\data\tabular_data_BJ\source\MI_4YCP.dta
rename total_pop pop
rename avg_salary_of_staff_worker avgsalary_michigan
rename gdp gdp_michigan
rename gdp_sector2 gdp_sector2_mi
rename gdp_sector3 gdp_sector3_mi
sort city_code year
save temp_mi3ycp.dta, replace

use ..\..\data\tabular_data_BJ\source\MI_ADCP.dta
sort city_code year
save ..\..\data\tabular_data_BJ\source\MI_ADCP.dta, replace

** Professor Zhang's dijishi.csv
insheet using ..\..\data\tabular_data_BJ\source\asset_dijishi.csv, clear
rename city05 city_code
rename gross_asset_cp asset_g_qz
rename net_asset_cp asset_n_qz
rename city_name city_namex
keep city_code year asset_g_qz asset_n_qz
sort city_code year
save temp_qz3.dta, replace
*2005 Asset Data
insheet using ..\..\data\tabular_data_BJ\source\assets_05.csv, clear
rename city05 city_code
rename net_asset_cp asset_n_qz
rename city_name city_namex
keep city_code year asset_n_qz
append using temp_qz3.dta
sort city_code year
save temp_qz3.dta, replace


/****** 2. Merge Data Sets to Correspondence Table Using 
					City Proper/Year Units Only ********/

*** Create a data set at the city proper/year level using 2008/defn prefectures
use ..\..\data\correspondence_tables\generated\correspondence_82_10.dta
keep if unit_status==1
keep province_name province_code city_code city_name city05 year
sort city_code year
by city_code year: keep if _n==1
sort city_code year

*** Individual Yearbook
merge city_code year using temp_indyb.dta
tab year _merge
rename _merge mrg_ind

*** Printed Yearbooks
sort city_code year
merge city_code year using temp_2ycp.dta
tab year _merge
*** These are county cities before promotion to prefecture cities
drop if _merge==2
rename _merge mrg_PY2Y

sort city_code year
merge city_code year using temp_mi3ycp.dta
tab year _merge
*** This is the special county cities handled elsewhere or missing b/c not yet promoted
drop if _merge==2
rename _merge mrg_MI3Y

sort city_code year
merge city_code year using ..\..\data\tabular_data_BJ\source\MI_ADCP.dta
tab year _merge
*** This is 2 extra prefectures and extra years + county cities handled elsewhere
drop if _merge==2
rename _merge mrg_MIAD
drop mrg_*

** Professor Zhang's dijishi_90_00_10_asset.csv (HY 08/09/12)

sort city_code year
merge city_code year using temp_qz3.dta
tab year _merge
** _merge=2 are outside our study area or for full prefecs only (no CP yet)
save temp.dta, replace
keep if _merge==2
keep city_code year asset_*
rename city_code city05
drop if asset_n_qz==.
rename asset_n_qz asset_n_qzx
rename asset_g_qz asset_g_qzx
sort city05 year
save ..\..\data\tabular_data_BJ\generated\asset_data.dta, replace
use temp.dta
drop if _merge==2
rename _merge mrg_QZ3

drop mrg_*

mvdecode totemp total_pop-num_colstd emp_sect2-emp_sect3, mv(-99)
mvdecode totemp-fdi_py, mv(-9)
replace gdp_py = . if gdp_py<0
replace gdp_sect2 = . if gdp_sect2<0
replace gdp_sect3 = . if gdp_sect3<0
replace givo = . if givo<0
replace givo_soe = . if givo_soe<0
replace givo_colt = . if givo_colt<0

*** Label Variables
label variable gdp "(ind yrb)"
label variable nnp "(ind yrb)"
label variable gdp_sector1 "(ind yrb)"
label variable nnp_sector1 "(ind yrb)"
label variable gdp_sector2 "(ind yrb)"
label variable nnp_sector2 "(ind yrb)"
label variable gdp_sector3 "(ind yrb)"
label variable nnp_sector3 "(ind yrb)"
label variable tot_pop "(ind yrb)"
label variable r08_gdp "(ind yrb)"
label variable r08_nnp "(ind yrb)"
label variable r08_gdp_sector2 "(ind yrb)"
label variable r08_nnp_sector2 "(ind yrb)"
label variable r08_gdp_sector3 "(ind yrb)"
label variable r08_nnp_sector3 "(ind yrb)"
label variable r08_tot_pop "(ind yrb)"
label variable num_car "(ind yrb)"
label variable dust_ems "(ind yrb)"
label variable gas_ems "(ind yrb)"
label variable so2 "(ind yrb)"
label variable kproad "(ind yrb)"
label variable aproad "(ind yrb)"
label variable bustrly "(ind yrb)"
label variable numhins "(ind yrb)"
label variable arrived_fdi "(ind yrb)"
label variable tot_emp "(ind yrb)"
label variable tot_emp_sect2 "(ind yrb)"
label variable tot_emp_sect3 "(ind yrb)"
label variable giov "(ind yrb)"
label variable giov_soecol "(ind yrb)"
label variable tot_area "(ind yrb)"
label variable giov_new "(ind yrb)"
label variable giov_soecol_new "(ind yrb)"
label variable totemp "(py)"
label variable agremp "(py)"
label variable minemp "(py)"
label variable manuemp "(py)"
label variable egwemp "(py)"
label variable constemp "(py)"
label variable geolemp "(py)"
label variable tstemp "(py)"
label variable whoreemp "(py)"
label variable fininsemp "(py)"
label variable reemp "(py)"
label variable ssemp "(py)"
label variable hsemp "(py)"
label variable ecmemp "(py)"
label variable stemp "(py)"
label variable pubemp "(py)"
label variable gdp_py "(py)"
label variable gdp_sect2 "(py)"
label variable gdp_sect3 "(py)"
label variable total_pop "(py)"
label variable kmpr "(py)"
label variable apr "(py)"
label variable prpc "(py)"
label variable num_bt "(py)"
label variable so2 "(py)"
label variable avgsalary "(py)"
label variable num_colstd "(py)"
label variable givo_py "(py)"
label variable givo_soe "(py)"
label variable givo_colt "(py)"
label variable exp_lgt "(py)"
label variable num_hedu "(py)"
label variable publibcol "(py)"
label variable culland "(py)"
label variable area "(py)"
label variable emp_sect2 "(py)"
label variable emp_sect3 "(py)"
label variable unit_status "(py)"
label variable fixinvest "(py)"
label variable fdi_py "(py)"
label variable asset_g_qz "Gross Assets (QZ)"
label variable asset_n_qz "Net Assets (QZ)"

sort city05 year
save ../../data/tabular_data_BJ/generated/cpt.dta, replace

*** This used to build 1990 and 2010 GDP imputations
keep if year==1990 | year==2010
keep city05 year gdp_py gdp_sect2 gdp_sect3 gdp_michigan gdp_sector2_mi gdp_sector3_mi
rename gdp_py cgdp_py
rename gdp_sect2 cgdp_sect2
rename gdp_sect3 cgdp_sect3
replace cgdp_py = gdp_michigan if year==2010
replace cgdp_sect2 = gdp_sector2_mi if year==2010
replace cgdp_sect3 = gdp_sector3_mi if year==2010
drop if cgdp_py==.
drop gdp_sector2_mi gdp_sector3_mi gdp_michigan
sort city05 year
save ../../data/tabular_data_BJ/generated/cp90_gdp.dta, replace

**** This is use to build 2010 GDP Imputations
use ..\..\data\tabular_data_BJ\source\MI_4YPF.dta
keep if year==2010
rename gdp pgdp_py
rename gdp_sector2 pgdp_sect2
rename gdp_sector3 pgdp_sect3
rename city_code city05
keep city05 year pgdp_*
sort city05 year
save ../../data/tabular_data_BJ/generated/pf10_gdp.dta, replace


log close

erase temp_2ycp.dta
erase temp_indyb.dta
erase temp_mi3ycp.dta
erase temp_qz3.dta
